

#################################################################################
# File Name:  	Rosenberg_et_al_replication_code.R
# Authors:		  Meital Rosenberg, Daniel Armanios, Michaël Aklin, Paulina Jaramillo
# Purpose:      Generate figures for the Rosenberg et al. (2019) artcile       
# Data input:   ACCESS-1 (Aklin et al. 2016)
# Date:         10/24/2019
#################################################################################

#################################################################################
# Libraries
#################################################################################

library("ggplot2")
library("maps")
library("mapdata")
library("maptools")
library("scales")
library("rgdal")
library("dplyr")
library("reshape2")
library("RColorBrewer")
library("dplyr")   
library("rstanarm")
library("shinystan")
library("sjPlot") # Plots for lmer
library("sjmisc") # Plots for lmer
library("sjlabelled") # Plots for lmer
library("bayesplot")
library("sensemakr")
library("rgeos")
library("maptools")
library("gpclib")
library("plyr")
library("cleangeo")
library("haven")

#################################################################################
# Data
#################################################################################

rm(list=ls())

# *** Update path to replication folder here ***
setwd("PATH_TO_REPLICATION_FOLDER_HERE")

# ACCESS data. Note: if RawDataHH_recoded.dta is not available, you need to run 
# Rosenberg_et_al_dataprocessing_code.do first. This file will generate a new
# version of RawDataHH_recoded.dta.

AccessData = read_dta("RawDataHH_recoded.dta")

#################################################################################
# Figure S1 (Map)
#################################################################################

#################################################################################
# Please note: the code to generate the map rests on data from IHDS. Please
# download the data from IHDS directly. Then:
#
# (1) Run the following in Stata
# keep if URBAN4_2011==2 | URBAN4_2011==3
# collapse (mean) FU1 [aweight = WT], by(STATEID)
# sum FU1
# replace FU1 = 100*FU1
# decode STATEID, gen(NAME_1)
# replace NAME_1 = substr(NAME_1, 1, length(NAME_1)-3)
#
# replace NAME_1 = "Dadra and Nagar Haveli" if NAME_1 == "Dadra+Nagar Haveli"
# replace NAME_1 = "Jammu and Kashmir" if NAME_1 == "Jammu & Kashmir"
# replace NAME_1 = "Daman and Diu" if NAME_1 == "Daman & Diu"
# replace NAME_1 = "NCT of Delhi" if NAME_1 == "Delhi"
# replace NAME_1 = "Odisha" if NAME_1 == "Orissa"
# replace NAME_1 = "Puducherry" if NAME_1 == "Pondicherry"
# 
# drop STATEID
#
# saveold "IHDS_India_State_Map.dta", replace v(12)
#
# (2) Un-comment and run the code below in R
#################################################################################

# India map
#IndiaMap = read.dta("IHDS_India_State_Map.dta")

# Shape file
# np_dist = readShapeSpatial("Map/IND_simplifiedV2/IND_adm1-1.shp")
# # VERIFY IT LOADED PROPERLY
# plot(np_dist)
# 
# # There are orphaned holes; these need to be cleaned
# #report <- clgeo_CollectionReport(np_dist) 
# #summary <- clgeo_SummaryReport(report)
# #issues <- report[report$valid == FALSE,]
# #issues
# np_dist = clgeo_Clean(np_dist) # Cleans up
# np_dist = fortify(np_dist, region = "NAME_1")
# 
# # Telangana was part of Andhra Pradesh until 2014
# np_dist$id[np_dist$id == "Telangana"] = "Andhra Pradesh"
# 
# # Indicator of who is in the sample
# np_dist$RatingArea = 0
# np_dist$RatingArea[np_dist$id == "Bihar"] = 1
# np_dist$RatingArea[np_dist$id == "Jharkhand"] = 1
# np_dist$RatingArea[np_dist$id == "Madhya Pradesh"] = 1
# np_dist$RatingArea[np_dist$id == "Odisha"] = 1
# np_dist$RatingArea[np_dist$id == "Uttar Pradesh"] = 1
# np_dist$RatingArea[np_dist$id == "West Bengal"] = 1
# 
# area_map = unionSpatialPolygons(np_dist, IDs = county_map$RatingArea)
# area_map = fortify(area_map)
# area_map$group = gsub(".1", "", x= area_map$group, fixed = T)
# 
# subset = subset(np_dist, id %in% c("Bihar", "Jharkhand", "Madhya Pradesh",
#                                    "Odisha", "Uttar Pradesh", "West Bengal","Telangana"))
# subset$id[subset$id == "Telangana"] = "Andhra Pradesh"
# 
# subset2 = subset(np_dist, id %in% c("Gujarat"))
# 
# #np_dist$id <- toupper(np_dist$id)  #change ids to uppercase
# plot = ggplot() + 
#   geom_map(data = IndiaMap, aes(map_id = NAME_1, fill = FU1), 
#                     map = np_dist,
#                     color = "white") + 
#   expand_limits(x = np_dist$long, y = np_dist$lat) +
#   coord_map() + 
#   scale_fill_continuous(name="HH Elect. (%)") + 
#   theme(axis.line=element_blank(),axis.text.x=element_blank(),
#         axis.text.y=element_blank(),axis.ticks=element_blank())+
#   xlab("") + ylab("")  +
#   geom_path(data=subset,aes(x = subset$long, y = subset$lat,group=group), color="red") +
#   geom_path(data=subset2,aes(x = subset2$long, y = subset2$lat,group=group), color="green")
# ggsave(plot, file = "./TablesFigures/MapIHDS.png")


#################################################################################
# Figure S2: Panel A
#################################################################################

# Men appliances
m1 = stan_glmer(formula = gender_male_total2 ~ 
                  m2_q55_grid +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m1)

# Women appliances
m2 = stan_glmer(formula = gender_female_total2 ~ 
                  m2_q55_grid +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m2)

# Extract posteriors
posteriorM1 = as.matrix(m1)
posteriorM2 = as.matrix(m2)

# Merging the two datasets to plot the posterior distributions in a single figure
extractM1 = posteriorM1[,2]
extractM2 = posteriorM2[,2]
extract = cbind(extractM1,extractM2)

# Small graph ()
plot = mcmc_areas(extract, 
                  pars = c("extractM1","extractM2"), 
                  prob = 0.9) + 
  labs(x = 'Effect of Grid (Parameter Distribution)') + 
  ggplot2::ylim("Female Appliances","Male Appliances")
ggsave(plot, file = "./TablesFigures/StanPosteriorBasic2.pdf")


#################################################################################
# Figure S2: Panel B
#################################################################################

# Men appliances
m3 = stan_glmer(formula = gender_male_total2 ~ 
                  m2_q55_1_grid_years2 +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m3)

# Women appliances
m4 = stan_glmer(formula = gender_female_total2 ~ 
                  m2_q55_1_grid_years +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m4)

# Extract posteriors
posteriorM3 = as.matrix(m3)
posteriorM4 = as.matrix(m4)
extractFull = cbind(posteriorM3[,2],posteriorM3[,3],posteriorM3[,4],
                    posteriorM4[,2],posteriorM4[,3],posteriorM4[,4])

mcmc_areas(extractFull, 
           pars = c("extractM3","extractM4"), 
           prob = 0.9) + 
  labs(x = 'Effect of Grid Availability (Years) (Parameter Distribution)') + 
  ggplot2::ylim("Female Appliances","Male Appliances")


# Merging the two datasets to plot the posterior distributions in a single figure
extractM3 = posteriorM3[,2]
extractM4 = posteriorM4[,2]
extract = cbind(extractM3,extractM4)

ggplot(data = as.data.frame(extract),aes(x=extractM3)) 
+geom_histogram(binwidth = 0.1)
+geom_histogram(aes(extractM4))

# Small graph ()
plot = mcmc_areas(extract, 
                  pars = c("extractM3","extractM4"), 
                  prob = 0.9) + 
  labs(x = 'Effect of Grid Availability (Years) (Parameter Distribution)') + 
  ggplot2::ylim("Female Appliances","Male Appliances")
ggsave(plot, file = "./TablesFigures/StanPosteriorBasicYears2.pdf")


#################################################################################
# Figure S2: Panel C
#################################################################################

# Men appliances
m5 = stan_glmer(formula = gender_male_total2 ~ 
                  m2_q68_elec +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m5)


# Women appliances
m6 = stan_glmer(formula = gender_female_total2 ~ 
                  m2_q68_elec +
                  (1 | m1_q11_village),
                data=AccessData,
                chains=4,
                iter=1000,
                prior = normal(location = 0, 
                               scale=1, 
                               autoscale = TRUE))
summary(m6)

# Extract posteriors
posteriorM5 = as.matrix(m5)
posteriorM6 = as.matrix(m6)
extractFull = cbind(posteriorM5[,2],posteriorM5[,3],posteriorM5[,4],
                    posteriorM6[,2],posteriorM6[,3],posteriorM6[,4])

mcmc_areas(extractFull, 
           pars = c("extractM3","extractM4"), 
           prob = 0.9) + 
  labs(x = 'Effect of Electrification (Parameter Distribution)') + 
  ggplot2::ylim("Female Appliances","Male Appliances")


# Merging the two datasets to plot the posterior distributions in a single figure
extractM5 = posteriorM5[,2]
extractM6 = posteriorM6[,2]
extract = cbind(extractM5,extractM6)

ggplot(data = as.data.frame(extract),aes(x=extractM5)) 
+geom_histogram(binwidth = 0.1)
+geom_histogram(aes(extractM6))

# Small graph ()
plot = mcmc_areas(extract, 
                  pars = c("extractM5","extractM6"), 
                  prob = 0.9) + 
  labs(x = 'Effect of Electrification (Parameter Distribution)') + 
  ggplot2::ylim("Female Appliances","Male Appliances")
ggsave(plot, file = "./TablesFigures/StanPosteriorElectrificationBasic2.pdf")


#################################################################################
# Sensitivity analysis
#################################################################################

# Generate new variable(s)
AccessData$logexp = log(AccessData$m1_q32_month_expenditure + 1)

# Run the linear model
model.male = lm(data=AccessData,
                gender_male_total2 ~ m2_q55_grid + logexp)

model.female = lm(data=AccessData,
                  gender_female_total2 ~ m2_q55_grid + logexp)

summary(model.male)

# Implement sensemakr
sensitivity.model.male = sensemakr(model.male, 
                                   benchmark_covariates = "logexp",
                                   treatment = "m2_q55_grid",
                                   kd = 1:3)


# Create plot with results
png("./Figures/sensmakr_male.png") 
plot(sensitivity.model.male,
     frame = FALSE,
     main="Robustness of male estimates")
dev.off()

# Same for female appliances: run the model and plot it.
sensitivity.model.female = sensemakr(model.female, 
                                   benchmark_covariates = "logexp",
                                   treatment = "m2_q55_grid",
                                   kd = 1:3)
png("./Figures/sensmakr_female.png") 
plot(sensitivity.model.female,
     frame = FALSE,
     main="Robustness of female estimates")
dev.off()


